* Paper: Don’t Republicans Tweet Too? Using Twitter to Assess the Consequences of Political Endorsements by Celebrities
* Purpose: Reproduce Tables 1, 2, 3, 4 in the paper

* Data In: celebrity_perspectives_data.dta
* Data Out: celebrity_perspectives_data.dta
* Stata version used on 3/30/2019: 15.1

version 15.1
clear
log using celebrity_analysis.log, replace
set more off 

use "celebrity_perspectives_data.dta"

*************************
* How to produce TABLE 1
*************************
drop if endorsed=="unclear"
drop if endorsed=="none"

gen observation = 1

* Row 1:
tabstat observation, s(sum)
* Number of tweets: 222,801

gen any_candidate_mentioned = (mentionsHillary==1 | mentionsTrump==1 | mentionsBernie==1)

* Row 6:
tabstat observation if any_candidate_mentioned==1, s(sum)
* Number of tweets that mention one of the candidates (incl. RTs): 9869

keep if is_retweet==0

* Row 2:
* Number of tweets remaining: 160,361

encode endorsed, gen(endorse_N)
encode screen_name, gen(c_id)

* Rows 3,4,5 of Table 1:
tabstat observation if endorsed=="Clinton", s(sum)
tabstat observation if endorsed=="Trump", s(sum)
tabstat observation if endorsed=="Sanders", s(sum)

* Row 7:
tabstat observation if any_candidate_mentioned==1, s(sum)


* Prepare dataset for regressions
gen date = dofc(created_at)
gen year = year(date)


* Calculate standardized RTs:
egen avg_celeb_RTs = mean(retweet_count), by(screen_name)
egen sd_celeb_RTs = sd(retweet_count), by(screen_name)

gen standardized_rt = (retweet_count - avg_celeb_RTs) / sd_celeb_RTs

*hist standardized_rt, by(mentionsTrump)

* By candidate mention
tabstat standardized_rt retweet_count if endorsed=="Clinton", by(mentionsHillary) s(mean)


*****************
* PREPARE TABLE 2
*****************

eststo clear
eststo: reg standardized_rt mentionsTrump mentionsHillary mentionsBernie i.year i.c_id,r
eststo: reg standardized_rt mentionsTrump mentionsHillary mentionsBernie i.year i.c_id if endorsed=="Clinton",r
eststo: reg standardized_rt mentionsTrump mentionsHillary mentionsBernie i.year i.c_id if endorsed=="Trump",r
eststo: reg standardized_rt mentionsTrump mentionsHillary mentionsBernie i.year i.c_id if endorsed=="Sanders",r
esttab using signif_tests.csv, t b(%5.2f) r2 nogaps obslast star(* 0.01) label replace

*****************
* TABLE 3
*****************
tabstat observation if endorsed=="Clinton" & mentionsTrump==1, s(sum)
tabstat observation if endorsed=="Clinton" & mentionsHillary==1, s(sum)
tabstat observation if endorsed=="Clinton" & mentionsBernie==1, s(sum)

tabstat observation if endorsed=="Trump" & mentionsTrump==1, s(sum)
tabstat observation if endorsed=="Trump" & mentionsHillary==1, s(sum)
tabstat observation if endorsed=="Trump" & mentionsBernie==1, s(sum)

tabstat observation if endorsed=="Clinton", s(sum)
tabstat observation if endorsed=="Trump", s(sum)
tabstat observation if endorsed=="Sanders", s(sum)

*****************
* TABLE 4A
*****************
tabstat observation if endorsed=="Clinton" & year==2016, s(sum)
tabstat observation if endorsed=="Trump" & year==2016, s(sum)
tabstat observation if endorsed=="Sanders" & year==2016, s(sum)

tabstat observation if endorsed=="Clinton" & mentionsTrump==1 & year==2016, s(sum)
tabstat observation if endorsed=="Clinton" & mentionsHillary==1 & year==2016, s(sum)
tabstat observation if endorsed=="Clinton" & mentionsBernie==1 & year==2016, s(sum)

tabstat observation if endorsed=="Trump" & mentionsTrump==1 & year==2016, s(sum)
tabstat observation if endorsed=="Trump" & mentionsHillary==1 & year==2016, s(sum)
tabstat observation if endorsed=="Trump" & mentionsBernie==1 & year==2016, s(sum)

*****************
* TABLE 4B
*****************
tabstat observation if endorsed=="Clinton" & year>=2017, s(sum)
tabstat observation if endorsed=="Trump" & year>=2017, s(sum)
tabstat observation if endorsed=="Sanders" & year>=2017, s(sum)

tabstat observation if endorsed=="Clinton" & mentionsTrump==1 & year>=2017, s(sum)
tabstat observation if endorsed=="Clinton" & mentionsHillary==1 & year>=2017, s(sum)
tabstat observation if endorsed=="Clinton" & mentionsBernie==1 & year>=2017, s(sum)

tabstat observation if endorsed=="Trump" & mentionsTrump==1 & year>=2017, s(sum)
tabstat observation if endorsed=="Trump" & mentionsHillary==1 & year>=2017, s(sum)
tabstat observation if endorsed=="Trump" & mentionsBernie==1 & year>=2017, s(sum)


* Non-political tweets
tabstat retweet_count if mentionsHillary==0 & mentionsTrump==0 & mentionsBernie==0, by(endorsed) s(median mean)

log close

exit
